{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from surprise import BaselineOnly\n",
    "from surprise import Dataset\n",
    "from surprise import Reader\n",
    "from surprise.model_selection import cross_validate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# path to dataset file\n",
    "file_path = os.path.expanduser('~/.surprise_data/ml-100k/ml-100k/u.data')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/jockeyyan/.surprise_data/ml-100k/ml-100k/u.data\n"
     ]
    }
   ],
   "source": [
    "print(file_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# As we're loading a custom dataset, we need to define a reader. In the\n",
    "# movielens-100k dataset, each line has the following format:\n",
    "# 'user item rating timestamp', separated by '\\t' characters.\n",
    "reader = Reader(line_format='user item rating timestamp', sep='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data = Dataset.load_from_file(file_path, reader=reader)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Estimating biases using als...\n",
      "Estimating biases using als...\n",
      "Estimating biases using als...\n",
      "Estimating biases using als...\n",
      "Estimating biases using als...\n",
      "Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).\n",
      "\n",
      "                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     \n",
      "RMSE (testset)    0.9502  0.9460  0.9427  0.9481  0.9329  0.9440  0.0061  \n",
      "MAE (testset)     0.7514  0.7512  0.7481  0.7520  0.7396  0.7485  0.0046  \n",
      "Fit time          0.19    0.22    0.21    0.20    0.20    0.20    0.01    \n",
      "Test time         0.14    0.09    0.13    0.13    0.13    0.12    0.02    \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'fit_time': (0.1881873607635498,\n",
       "  0.2153491973876953,\n",
       "  0.20892763137817383,\n",
       "  0.2015390396118164,\n",
       "  0.1997673511505127),\n",
       " 'test_mae': array([ 0.75135479,  0.75122802,  0.74809068,  0.75197995,  0.73962581]),\n",
       " 'test_rmse': array([ 0.9501566 ,  0.94597997,  0.94274699,  0.94805282,  0.93286402]),\n",
       " 'test_time': (0.1353447437286377,\n",
       "  0.09236693382263184,\n",
       "  0.13039946556091309,\n",
       "  0.1338942050933838,\n",
       "  0.12953591346740723)}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# We can now use this dataset as we please, e.g. calling cross_validate\n",
    "cross_validate(BaselineOnly(), data, verbose=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
